import numpy as np
import matplotlib.pyplot as plt
import librosa
import librosa.display
def analyze_prosody(audio_file):
# Load audio file
y, sr = librosa.load(audio_file)
# Extract pitch using librosa's pitch tracking
pitches, magnitudes = librosa.piptrack(y=y, sr=sr)
# Calculate pitch (in Hz) from magnitudes
pitch = np.mean(pitches[magnitudes > np.max(magnitudes) * 0.9])
# Extract tempo (pace)
tempo, _ = librosa.beat.beat_track(y=y, sr=sr)
# Calculate root mean square (RMS) energy
rms = librosa.feature.rms(y=y)[0]
# Normalize RMS to 0-1 range
rms_norm = (rms - np.min(rms)) / (np.max(rms) - np.min(rms))
# Plotting
plt.figure(figsize=(12, 8))
# Plot pitch
plt.subplot(3, 1, 1)
librosa.display.specshow(pitches, sr=sr, x_axis='time', y_axis='linear')
plt.colorbar()
plt.title('Pitch')
# Plot tempo
plt.subplot(3, 1, 2)
plt.plot(np.arange(len(y)) / sr, y)
plt.xlabel('Time (s)')
plt.ylabel('Amplitude')
plt.title('Waveform')
# Plot RMS energy
plt.subplot(3, 1, 3)
plt.plot(np.arange(len(rms)) / sr, rms_norm)
plt.xlabel('Time (s)')
plt.ylabel('RMS Energy')
plt.title('RMS Energy')
plt.tight_layout()
plt.show()
return pitch, tempo, rms_norm
# Example usage
audio_file = 'test.wav'
pitch, tempo, rms_norm = analyze_prosody(audio_file)
print("Average pitch:", pitch, "Hz")
print("Tempo (pace):", tempo, "BPM")